#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

from pymongo import MongoClient, IndexModel, ASCENDING, DESCENDING


# 连接MongoDB
def InitMongoDB(MONGODB_HOST, MONGODB_PORT, MONGODB_DB_NAME, MONGODB_COLLECTION_NAME):
    client = MongoClient(MONGODB_HOST, MONGODB_PORT)
    db = client[MONGODB_DB_NAME]
    collection = db[MONGODB_COLLECTION_NAME]
    return client, collection


# 关闭MongoDB
def CloseMongoDB(MongoDB_Connection):
    MongoDB_Connection.close()


def SearchCadidates():
    cet4_words = set()
    infile = open("D:/cet4_wordlist.txt", "r")
    for line in infile:
        line = line.strip()
        cet4_words.add(line)
    infile.close()

    ht_total = {}
    ht_names = {}

    client_HGNC, collection_HGNC = InitMongoDB("10.188.188.22", 27017, "Pre_Data", "HGNC")
    for result in collection_HGNC.find({}, {"_id": 0, "dms_id": 1, "dms_name": 1, "dms_synonym_extend": 1}):
        names = result["dms_synonym_extend"]
        dms_id = result["dms_id"]
        dms_name = result["dms_name"]
        for name in names:
            name = name.lower()
            if name in cet4_words:
                if name in ht_total:
                    ht_total[name] += 1
                    ht_names[name] += "; HGNC:" + dms_id + ":" + dms_name
                else:
                    ht_total[name] = 1
                    ht_names[name] = "HGNC:" + dms_id + ":" + dms_name
    CloseMongoDB(client_HGNC)

    client_Drugbank, collection_Drugbank = InitMongoDB("10.188.188.22", 27017, "Pre_Data", "drugbank_v2_new")
    for result in collection_Drugbank.find({}, {"_id": 0, "dms_id": 1, "dms_name": 1, "dms_synonym_extend": 1}):
        names = result["dms_synonym_extend"]
        dms_id = result["dms_id"]
        dms_name = result["dms_name"]
        for name in names:
            name = name.lower()
            if name in cet4_words:
                if name in ht_total:
                    ht_total[name] += 1
                    ht_names[name] += "; Drugbank:" + dms_id + ":" + dms_name
                else:
                    ht_total[name] = 1
                    ht_names[name] = "Drugbank:" + dms_id + ":" + dms_name
    CloseMongoDB(client_Drugbank)

    outfile = open("D:/candidate_stopwords.tsv", "w")
    for key in ht_total.keys():
        print(key + "\t" + str(ht_total[key]))
        outfile.write(key + "\t" + str(ht_total[key]) + "\t" + ht_names[key] + "\n")
        outfile.flush()
    outfile.close()


if __name__ == '__main__':
    SearchCadidates()
